import openai
"""
CUDA_VISIBLE_DEVICES=0 swift deploy --model_type qwen1half-7b-chat --model_id_or_path /home/xuyuhang/git/LLM/MedicalGPT/qwen/Qwen1___5-7B-Chat  --ckpt_dir /home/xuyuhang/git/LLM/swift_llm/output/qwen1half-7b-chat/v0-20240607-142846/checkpoint-159  --merge_lora true --host 0.0.0.0 --port 8999
"""
openai.api_key = "EMPTY"
openai.base_url = "http://10.106.153.12:8999/v1/"
model ='qwen1half-7b-chat'
# create a chat completion
completion = openai.chat.completions.create(
  model=model, max_tokens=256,
  messages=[{"role": "user", "content": "你好"}]
)
# print the completion
print(completion.choices[0].message.content)

client = OpenAI(
    api_key="EMPTY",
    base_url="http://xx.xxx.xx.x:8001/v1/"
)

def get_local_model_response(q,model='qwen2_5-14b-instruct'):
    #model = 'qwen2_5-14b-instruct'
    # 使用 client 创建聊天完成
    completion = client.chat.completions.create(
        model=model,
        max_tokens=256,
        messages=[{"role": "user", "content": q}]
    )
    # 打印回复内容
    return completion.choices[0].message.content
